// 0. This do-file contains code for how to clean the raw SIPP data from the Census Bureau to obtain our analysis data file. The raw files for the SIPP can be found at https://www.nber.org/research/data/survey-income-and-program-participation-sipp for the 1996-2008 Panels and at https://www.census.gov/programs-surveys/sipp/data.html for the 2014-2018 Panels. We keep households that are present for at least 20 months (five waves) for 1996-2008 Panels and for 24 months (two waves) for 2014-2018 Panels and specifically flag (and keep) those households that receive one of our analyzed programs in at least one month. Then recode certain variables so that they are aligned across all panels (2014 redesign led to realignment of variables). Using this merged dataset, calculate various variables needed for subsequent analyses. 

**** Set globals 
set more off
clear all

global dir_sipp "[FILL IN]" // FILL IN WITH FOLDER PATH FOR RAW SIPP FILES
global dir_data "[FILL IN]" // FILL IN WITH FOLDER PATH FOR MAIN FILES
global temp "`c(tmpdir)'"

**************************************
******Construct Analysis Samples******
**************************************
cd "${dir_sipp}"

foreach year in 1996 2001 2004 2008 {
	use sipp_`year', clear

	**** Keep households present for at least five consecutive waves 
	capture drop if swave==16 & spanel==2008 // Drop Wave 16 of 2008 SIPP Panel
	egen swave_count = nvals(swave), by(spanel ssuid shhadid)	
	
	egen min_wave = min(swave), by(spanel ssuid shhadid)
	egen max_wave = max(swave), by(spanel ssuid shhadid)
	
	gen max_wave_temp = (min_wave + swave_count) - 1
	gen match = (max_wave==max_wave_temp)
	
	keep if swave_count>=5 & match==1
		
	**** Calculate income-to-poverty ratio
	if `year'==1996 {
		gen incpov = thtotinc/(thpov/12)
	}
	
	else {
		gen incpov = thtotinc/rhpov
	}		

	**** Flag households with program receipt in at least one month
	** SNAP
	gen snap_rec = (thfdstp>0)
	egen snap_rec_any = max(snap_rec), by(spanel ssuid shhadid)
	
	** TANF
	gen tanf_rec = (thafdc>0)
	egen tanf_rec_any = max(tanf_rec), by(spanel ssuid shhadid)
	
	** Medicaid
	gen medicaid_rec_temp = (ecdmth==1)
	egen medicaid_rec = max(medicaid_rec_temp), by(spanel ssuid shhadid rhcalyr rhcalmn)
	egen medicaid_rec_any = max(medicaid_rec), by(spanel ssuid shhadid)
	
	** Workers' compensation
	gen wc_rec_temp = (er10==1)
	egen wc_rec = max(wc_rec_temp), by(spanel ssuid shhadid rhcalyr rhcalmn)
	egen wc_rec_any = max(wc_rec), by(spanel ssuid shhadid)	
	
	** VA benefits
	gen vet_rec_temp = (er08==1)
	egen vet_rec = max(vet_rec_temp), by(spanel ssuid shhadid rhcalyr rhcalmn)
	egen vet_rec_any = max(vet_rec), by(spanel ssuid shhadid)	
	
	** UI
	gen ui_rec = (thunemp>0)
	egen ui_rec_any = max(ui_rec), by(spanel ssuid shhadid)	
	
	** DI
	egen tpoasdi = rowtotal(t01amta t01amtk)	
	gen di_rec_temp = (tpoasdi>0 & (eresnss1==2 | eresnss2==2))
	egen di_rec = max(di_rec_temp), by(spanel ssuid shhadid rhcalyr rhcalmn)
	egen di_rec_any = max(di_rec), by(spanel ssuid shhadid)
	
	** SSI
	gen ssi_rec = (thssi>0)
	egen ssi_rec_any = max(ssi_rec), by(spanel ssuid shhadid)	
	
	** WIC
	gen wic_rec_temp = (ewicyn==1)
	egen wic_rec = max(wic_rec_temp), by(spanel ssuid shhadid rhcalyr rhcalmn)
	egen wic_rec_any = max(wic_rec), by(spanel ssuid shhadid)
	
	**** Keep households that receive program or are eligible for program
	keep if (snap_rec_any==1 | tanf_rec_any==1 | medicaid_rec_any==1 | ///
		wc_rec_any==1 | vet_rec_any==1 | ui_rec_any==1 | di_rec_any==1 | ///
		ssi_rec_any==1 | wic_rec_any==1)
	
	**** Save temporary files 
	drop *_rec_temp 
	
	qui compress
	save "${temp}/sipp_`year'_sample", replace
}

global startyear_2014 2012
global startyear_2018 2016

foreach year in 2014 2018 {
	use sipp_`year', clear

	**** Keep households present for at least two consecutive years  
	egen swave_count = nvals(swave), by(spanel ssuid shhadid)	
	
	egen min_wave = min(swave), by(spanel ssuid shhadid)
	egen max_wave = max(swave), by(spanel ssuid shhadid)
	
	gen max_wave_temp = (min_wave + swave_count) - 1
	gen match = (max_wave==max_wave_temp)
	
	keep if swave_count>=2 & match==1
		
	**** Calculate income-to-poverty ratio
	gen incpov = thtotinc/rhpov
	
	**** Construct calendar year and month
	gen rhcalyr = ${startyear_`year'} + swave 
	gen rhcalmn = monthcode 	

	**** Flag households with program receipt in at least one month
	** SNAP
	if `year'==2014 {
		gen snap_rec_temp = (tfs_amt!=.)
	}

	else {
		gen snap_rec_temp = (tsnap_amt!=.)
	}
	
	egen snap_rec = max(snap_rec_temp), by(spanel ssuid shhadid rhcalyr rhcalmn)
	egen snap_rec_any = max(snap_rec), by(spanel ssuid shhadid)
	
	** TANF
	gen tanf_rec_temp = (ttanf_amt!=.)
	egen tanf_rec = max(tanf_rec_temp), by(spanel ssuid shhadid rhcalyr rhcalmn)
	egen tanf_rec_any = max(tanf_rec), by(spanel ssuid shhadid)
	
	** Medicaid
	gen medicaid_rec_temp = (emdmth==1)
	egen medicaid_rec = max(medicaid_rec_temp), by(spanel ssuid shhadid rhcalyr rhcalmn)
	egen medicaid_rec_any = max(medicaid_rec), by(spanel ssuid shhadid)
	
	** Workers' compensation
	gen wc_rec_temp = (twcamt!=.)
	egen wc_rec = max(wc_rec_temp), by(spanel ssuid shhadid rhcalyr rhcalmn)
	egen wc_rec_any = max(wc_rec), by(spanel ssuid shhadid)	
	
	** VA benefits
	gen vet_rec_temp = (tva1amt!=. | tva2amt!=.)
	egen vet_rec = max(vet_rec_temp), by(spanel ssuid shhadid rhcalyr rhcalmn)
	egen vet_rec_any = max(vet_rec), by(spanel ssuid shhadid)	
	
	** UI
	gen ui_rec_temp = (tuc1amt!=. | tuc2amt!=.)
	egen ui_rec = max(ui_rec_temp), by(spanel ssuid shhadid rhcalyr rhcalmn)
	egen ui_rec_any = max(ui_rec), by(spanel ssuid shhadid)	
	
	** DI	
	gen di_rec_temp = (esssmnyn==1 & essrsn2yn==1)
	egen di_rec = max(di_rec_temp), by(spanel ssuid shhadid rhcalyr rhcalmn)
	egen di_rec_any = max(di_rec), by(spanel ssuid shhadid)
	
	** SSI
	gen ssi_rec_temp = (tssi_amt!=.)
	egen ssi_rec = max(ssi_rec_temp), by(spanel ssuid shhadid rhcalyr rhcalmn)
	egen ssi_rec_any = max(ssi_rec), by(spanel ssuid shhadid)	
	
	** WIC
	if `year'==2014 {
		gen wic_rec_temp = (twicamt!=.)
	}

	else {
		gen wic_rec_temp = (twic_amt!=.)
	}
	
	egen wic_rec = max(wic_rec_temp), by(spanel ssuid shhadid rhcalyr rhcalmn)
	egen wic_rec_any = max(wic_rec), by(spanel ssuid shhadid)

	**** Keep households that receive program or are eligible for program
	keep if (snap_rec_any==1 | tanf_rec_any==1 | medicaid_rec_any==1 | ///
		wc_rec_any==1 | vet_rec_any==1 | ui_rec_any==1 | di_rec_any==1 | ///
		ssi_rec_any==1 | wic_rec_any==1)
	
	**** Save temporary files 
	drop *_rec_temp 
	
	qui compress
	save "${temp}/sipp_`year'_sample", replace
}

**** Create consistent frames for 2014 and 2018
foreach year in 2014 2018 {
	use "${temp}/sipp_`year'_sample", clear

	** Keep relevant variables 
	if `year'==2014 {
		keep spanel ssuid shhadid swave monthcode rhcalmn rhcalyr erelrpe pnum ///
			rhpov thtotinc edisabl ems rmesr tage renroll tpearn ///
			tpprpinc tsssamt tsscamt tret6amt tret7amt tssi_amt tuc1amt tuc2amt tva1amt tva2amt ///
			twcamt tdis1amt tdis2amt ttanf_amt tga_amt tfccamt twicamt tfs_amt tcsamt taliamt ///
			tret1amt tret2amt tret5amt tret3amt tret4amt tlifeamt tret8amt tlmpamt ///
			tminc_amt essrsn2yn efree_lunch elunch_yn *_rec *_rec_any ///
			wpfinwgt min_wave max_wave swave_count esex eeduc erace eorigin etenure ///
			tehc_metro tehc_st		
	}
	
	else {
		keep spanel ssuid shhadid swave monthcode rhcalmn rhcalyr erelrpe pnum ///
			rhpov thtotinc edisabl ems rmesr tage renroll tpearn ///
			tpprpinc tsssamt tsscamt tret6amt tret7amt tssi_amt tuc1amt tuc2amt tva1amt tva2amt ///
			twcamt tdis1amt tdis2amt ttanf_amt tga_amt tfccamt twic_amt tsnap_amt tcsamt taliamt ///
			tret1amt tret2amt tret5amt tret3amt tret4amt tlifeamt tret8amt tlmpamt ///
			tdraw_amt tminc_amt essrsn2yn efree_lunch rlunch_chld *_rec *_rec_any ///
			wpfinwgt min_wave max_wave swave_count esex eeduc erace eorigin etenure ///
			tehc_metro tehc_st			
	}

	** Rename/recast variables 
	if `year'==2014 {
		rename twicamt t25amt 
		rename tfs_amt t27amt 
		rename elunch_yn rlunch_chld
	}
	
	else {
		rename twic_amt t25amt 
		rename tsnap_amt t27amt 
		rename tdraw_amt t42amt		
	}	
	
	gen whfnwgt_temp = wpfinwgt if inlist(erelrpe,1,2)
	egen whfnwgt = mean(wpfinwgt), by(spanel ssuid shhadid rhcalyr rhcalmn)
	drop whfnwgt_temp wpfinwgt 
	
	gen ehrefper_temp = pnum if inlist(erelrpe,1,2)
	egen ehrefper = mean(ehrefper_temp), by(spanel ssuid shhadid rhcalyr rhcalmn)
	drop ehrefper_temp erelrpe
	
	rename monthcode srefmon 
	rename pnum epppnum 
	
	rename tsssamt t01amta 
	rename tsscamt t01amtk
	rename tret6amt t02amt 
	rename tssi_amt t03amta 
	rename tuc1amt t05amt 
	rename tuc2amt t06amt 

	gen t08amt = tva1amt + tva2amt if tva1amt!=. & tva2amt!=.
	replace t08amt = tva1amt if tva1amt!=. & tva2amt==.
	replace t08amt = tva2amt if tva1amt==. & tva2amt!=.	
	
	rename twcamt t10amt 
	rename tdis1amt t13amt 
	rename tdis2amt t14amt 
	rename ttanf_amt t20amt 
	rename tga_amt t21amt 
	rename tfccamt t23amt 
	rename tcsamt t28amt 
	rename taliamt t29amt 
	rename tret1amt t30amt 
	rename tret2amt t31amt
	
	gen t32amt = tret5amt + tret7amt if tret5amt!=. & tret7amt!=.
	replace t32amt = tret5amt if tret5amt!=. & tret7amt==.
	replace t32amt = tret7amt if tret5amt==. & tret7amt!=.
	
	rename tret3amt t34amt 
	rename tret4amt t35amt 
	rename tlifeamt t36amt 
	rename tret8amt t38amt 
	rename tlmpamt t39amt 
	rename tminc_amt t56amt 
	
	gen eresnss1 = 1 if essrsn2yn==2
	replace eresnss1 = 2 if essrsn2yn==1
	gen eresnss2 = 1 if essrsn2yn==2
	replace eresnss2 = 2 if essrsn2yn==1
	drop essrsn2yn
	
	gen efreelun = 1 if efree_lunch==1
	replace efreelun = 1 if efree_lunch==2
	replace efreelun = 2 if efree_lunch==3 
	drop efree_lunch 
	
	gen rnklun_temp = 1 if (inlist(rlunch_chld,1,2))
	egen rnklun = sum(rnklun_temp) if efreelun==1, ///
		by(spanel ssuid shhadid rhcalyr rhcalmn)
	drop rnklun_temp rlunch_chld 
	
	rename eeduc eeducate 
	
	replace tehc_metro = 3 if tehc_metro==0
	rename tehc_metro tmetro
	
	rename tehc_st tfipsst 

	** Save
	compress
	save "${temp}/sipp_`year'_sample_rev", replace	
}

**** Append across panels
use "${temp}/sipp_1996_sample", clear

foreach year in 2001 2004 2008 {
	qui append using "${temp}/sipp_`year'_sample"
}

foreach year in 2014 2018 {
	qui append using "${temp}/sipp_`year'_sample_rev"
}
	
qui compress
	
***********************************************
******Set Up for Income Dynamics Analysis******
***********************************************
keep spanel ssuid shhadid swave srefmon rhcalmn rhcalyr ehrefper epppnum ///
	rhpov thpov thtotinc edisabl ems rmesr tage renroll tpearn ///
	tpprpinc t*amt* eresnss* efreelun rnklun th* eegyamt *_rec *_rec_any ///
	whfnwgt min_wave max_wave swave_count esex ///
	eeducate erace eorigin etenure tmetro tfipsst ewicyn t25amt

**** Construct indicators for poverty
** Replace RHPOV with THPOV/12 for 1996 panel
replace rhpov = thpov/12 if spanel==1996

** Indicators
gen deeppoor = (thtotinc<=rhpov*.5)
gen poor = (thtotinc<=rhpov)
gen nearpoor = (thtotinc<=rhpov*1.5)

**** Construct household-level indicators 
** Binary individual-level indicators
gen disabled = (edisabl==1)
gen sepdivwid = (inlist(ems,3,4,5))
gen unemployed = (inlist(rmesr,6,7))
gen nilf_25to59 = (rmesr==8 & tage>=25 & tage<=59)
gen notworking = (unemployed==1 | nilf_25to59==1)	
gen kid_0to4 = (tage<=4)
gen elderly = (tage>=65)
		
** Calculate at household level
foreach type in disabled sepdivwid unemployed nilf_25to59 notworking kid_0to4 elderly {
	egen `type'_any = max(`type'), by(spanel ssuid shhadid swave srefmon)
}	
	
**** Construct demographic variables 	
** Female
gen female = (esex==2)

** Race/ethnicity
gen white = (erace==1)
gen black = (erace==2)
gen amind = (erace==3)
gen asian = (erace==4)

gen eorigin_new = eorigin if inlist(spanel,2004,2008,2014,2018)
replace eorigin_new = 1 if inrange(eorigin,20,28) & inlist(spanel,1996,2001)
replace eorigin_new = 2 if !inrange(eorigin,20,28) & inlist(spanel,1996,2001)

gen hispanic = (eorigin_new==1)

replace amind = . if inlist(spanel,2014,2018)
replace asian = 1 if erace==3 & inlist(spanel,2014,2018)
replace asian = 0 if erace==4 & inlist(spanel,2014,2018)

gen white_nonhisp = (white==1 & hispanic==0)
gen black_nonhisp = (black==1 & hispanic==0)

** Education level
gen lessthanhs = (inrange(eeducate,31,38))
gen hs_grad = (eeducate==39)
gen somecollege = (inlist(eeducate,40,41,42,43))
gen ba = (inrange(eeducate,44,47)) 
gen college = (somecollege==1 | ba==1)

** State
replace tfipsst = . if inlist(tfipsst,60,61) & inlist(spanel,2014,2018) // Drop FIPS if overseas
replace tfipsst = 61 if inlist(tfipsst,23,50) & inlist(spanel,2004,2008,2014,2018) // Combine ME, VT
replace tfipsst = 62 if inlist(tfipsst,38,46,56) & inlist(spanel,2004,2008,2014,2018) // Combine ND, SD, WY
	
** Calculate number of adults and children
gen adult = (tage>=18)
gen child = (tage<18)

egen num_adults = sum(adult), by(spanel ssuid shhadid swave srefmon)
egen num_children = sum(child), by(spanel ssuid shhadid swave srefmon)

** Household type
gen hh_eld = elderly_any 
gen hh_withchild_noeld = (num_children>0 & elderly_any==0)
gen hh_nochild_noeld = (num_children==0 & elderly_any==0)

compress

**** Construct various income variables
** Align unemployment insurance amounts across panels
replace t06amt = t07amt if inlist(spanel,1996,2001)
		
** Cash income
egen tpmktinc = rowtotal(tpearn tpprpinc t12amt t13amt t14amt t15amt t23amt ///
	t26amt t28amt t29amt t30amt t31amt t32amt t34amt t35amt t36amt t37amt t38amt ///
	t50amt t51amt t52amt t53amt t55amt t56amt)
				
egen tpsocins = rowtotal(t01amta t01amtk t02amt t05amt t06amt t07amt t09amt ///
	t10amt t60amtg t60amtt t75amt) 
		
egen tpmtcash = rowtotal(t03amta t03amtk t04amt t08amt t20amt t21amt ///
	t24amt t61amt t62amt t64amt)	

foreach var in mktinc socins mtcash {
	egen th`var' = sum(tp`var'), by(spanel ssuid shhadid swave srefmon)
}			
		
** Calculate/impute certain non-monetary transfers to be at household level
* WIC
egen thwic = sum(t25amt), by(spanel ssuid shhadid swave srefmon)

* School lunch
global schoollunch_rr_1995 1.76
global schoollunch_rr_1996 1.8
global schoollunch_rr_1997 1.84
global schoollunch_rr_1998 1.89
global schoollunch_rr_1999 1.94
global schoollunch_rr_2000 1.98
global schoollunch_rr_2001 2.02
global schoollunch_rr_2002 2.09	
global schoollunch_rr_2003 2.14
global schoollunch_rr_2004 2.19
global schoollunch_rr_2005 2.24
global schoollunch_rr_2006 2.32
global schoollunch_rr_2007 2.40
global schoollunch_rr_2008 2.47
global schoollunch_rr_2009 2.57
global schoollunch_rr_2010 2.68
global schoollunch_rr_2011 2.72
global schoollunch_rr_2012 2.77
global schoollunch_rr_2013 2.86
global schoollunch_rr_2014 2.93
global schoollunch_rr_2015 2.98
global schoollunch_rr_2016 3.07
global schoollunch_rr_2017 3.16
global schoollunch_rr_2018 3.24 // Imputed as average of surrounding years 
global schoollunch_rr_2019 3.31
global schoollunch_rr_2020 3.41
		
gen thschoollunch = .
forvalues year = 1995/2013 {
	gen thschoollunch_`year' = rnklun * 20 * ${schoollunch_rr_`year'} ///
		if efreelun==1
	replace thschoollunch = thschoollunch_`year' if rhcalyr==`year'	
}
replace thschoollunch = 0 if thschoollunch==.
		
** Total income and transfers
egen thallinc = rowtotal(thmktinc thsocins thmtcash thfdstp thwic thschoollunch eegyamt)

drop t*amt*

** As share of poverty threshold
foreach var of varlist thmktinc* thallinc* {
	gen `var'_povshare = `var'/rhpov
}

compress

**** Keep only household heads
keep if ehrefper==epppnum		
		
**** Align 2014 and 2018 waves/reference months with prior years 
foreach year in 2014 2018 {
	gen swave_`year' = swave if spanel==`year'
	gen srefmon_`year' = srefmon if spanel==`year'
	
	replace swave = 1 if swave_`year'==1 & inrange(srefmon_`year',1,4)
	replace swave = 2 if swave_`year'==1 & inrange(srefmon_`year',5,8)
	replace swave = 3 if swave_`year'==1 & inrange(srefmon_`year',9,12)
	replace swave = 4 if swave_`year'==2 & inrange(srefmon_`year',1,4)
	replace swave = 5 if swave_`year'==2 & inrange(srefmon_`year',5,8)
	replace swave = 6 if swave_`year'==2 & inrange(srefmon_`year',9,12)
	replace swave = 7 if swave_`year'==3 & inrange(srefmon_`year',1,4)
	replace swave = 8 if swave_`year'==3 & inrange(srefmon_`year',5,8)
	replace swave = 9 if swave_`year'==3 & inrange(srefmon_`year',9,12)
	replace swave = 10 if swave_`year'==4 & inrange(srefmon_`year',1,4)
	replace swave = 11 if swave_`year'==4 & inrange(srefmon_`year',5,8)
	replace swave = 12 if swave_`year'==4 & inrange(srefmon_`year',9,12)	
}
		
**** Construct program indicators
** Calculate month indicator
egen month = group(swave srefmon)

foreach prog in di ui wc vet snap tanf medicaid ssi wic {
	** Any receipt in wave  
	forvalues i = 1/15 {
		egen `prog'_rec_w`i'_temp = mean(`prog'_rec) if swave==`i', ///
			by(spanel ssuid shhadid)
		egen `prog'_rec_w`i' = max(`prog'_rec_w`i'), by(spanel ssuid shhadid)
		drop `prog'_rec_w`i'_temp
	}

	** Flag households with no receipt in first X waves
	gen no`prog'_first2waves = .
	gen no`prog'_first3waves = .
	gen no`prog'_first4waves = .
	gen no`prog'_first5waves = .	
	gen no`prog'_first6waves = .
	
	forvalues j = 1/7 {
		local a = `j'
		local b = `j'+1
		local c = `j'+2
		local d = `j'+3
		local e = `j'+4
		local f = `j'+5
		
		replace no`prog'_first2waves = 1 if `prog'_rec_w`a'==0 & `prog'_rec_w`b'==0 & ///
			min_wave==`j'			
		replace no`prog'_first3waves = 1 if `prog'_rec_w`a'==0 & `prog'_rec_w`b'==0 & ///
			`prog'_rec_w`c'==0 & min_wave==`j'	
		replace no`prog'_first4waves = 1 if `prog'_rec_w`a'==0 & `prog'_rec_w`b'==0 & ///
			`prog'_rec_w`c'==0 & `prog'_rec_w`d'==0 & min_wave==`j'
		replace no`prog'_first5waves = 1 if `prog'_rec_w`a'==0 & `prog'_rec_w`b'==0 & ///
			`prog'_rec_w`c'==0 & `prog'_rec_w`d'==0	& `prog'_rec_w`e'==0 & min_wave==`j'	
		replace no`prog'_first6waves = 1 if `prog'_rec_w`a'==0 & `prog'_rec_w`b'==0 & ///
			`prog'_rec_w`c'==0 & `prog'_rec_w`d'==0	& `prog'_rec_w`e'==0 & ///
			`prog'_rec_w`f'==0 & min_wave==`j'	
	}

	replace no`prog'_first2waves = 0 if no`prog'_first4waves==.	
	replace no`prog'_first3waves = 0 if no`prog'_first4waves==.	
	replace no`prog'_first4waves = 0 if no`prog'_first4waves==.
	replace no`prog'_first5waves = 0 if no`prog'_first5waves==.	
	replace no`prog'_first6waves = 0 if no`prog'_first6waves==.

	** Construct month variable, identify month first receiving, and calculate months since receipt
	gen month_`prog'rec = month if `prog'_rec==1
	egen month_first`prog' = min(month_`prog'rec), by(spanel ssuid shhadid)
	gen months_since`prog' = month - month_first`prog'
	egen months_since`prog'_min = min(months_since`prog'), by(spanel ssuid shhadid)		
	egen months_since`prog'_max = max(months_since`prog'), by(spanel ssuid shhadid)	
	
	compress
}

drop *_rec_w*

**** Adjust for inflation (using PCE)
preserve

** Import and clean raw data
import delimited "${dir_pce}/PCEPI.csv", clear

* Extract year/month
gen rhcalyr = substr(date,1,4)
gen rhcalmn = substr(date,6,2)
destring rhcalyr rhcalmn, replace
		
* Rename PCE variable
rename pcepi pce

* Save
compress
save "${temp}/pce", replace
		
restore
		
** Merge with PCE 
merge m:1 rhcalyr rhcalmn using "${temp}/pce"
drop if _merge==2
drop _merge
		
** Generate July 2013 multiplier
sum pce if rhcalyr==2013 & rhcalmn==7
global pce_base = r(mean)
		
gen pce_multiplier = ${pce_base} * 1/pce

** Adjust dollars to 2013 dollars
foreach var of varlist thmktinc* thallinc* {
	qui replace `var' = `var' * pce_multiplier
}

**** Merge in state unemployment rate
** Clean raw unemployment data
preserve

** Import raw data
import excel "${dir_stateunemp}/state_unemployment_through2023", clear
drop in 1/8

** Rename variables
rename A tfipsst
rename C rhcalyr
rename D rhcalmn
rename E pop_civnoninst
rename K unemp_rate

keep tfipsst rhcalyr rhcalmn pop_civnoninst unemp_rate

** Drop non-states
drop if inlist(tfipsst, "037", "51000")

** Destring
destring, replace

** Collapse states per groups 
replace tfipsst = 61 if inlist(tfipsst,23,50) // Combine ME, VT
replace tfipsst = 62 if inlist(tfipsst,38,46,56) // Combine ND, SD, WY

collapse (mean) unemp_rate [iw=pop_civnoninst], by(tfipsst rhcalyr rhcalmn)

** Save temporary file
compress
save "${temp}/unemp_rate", replace

restore

** Merge with unemployment data
merge m:1 tfipsst rhcalyr rhcalmn using "${temp}/unemp_rate"
drop if _merge==2
drop _merge

**** Calculate new variables	
** Year-month	
gen year_month = ym(rhcalyr, rhcalmn)
format %tm year_month

** Unique household ID
egen hh_id = group(spanel ssuid shhadid)

**** Shorten Medicaid variables
rename nomedicaid* nomed*
rename months_sincemedicaid* months_sincemed*
rename medicaid_* med_*

**** Save 
compress
save "${dir_data}/sipp_sample_analysis", replace








